https://github.com/mokys1213/STATS-506-FA-2024/blob/main/HW5/HW5.html
# Rcpp library
library(Rcpp)
# Defining GCD and LCM functions
cppFunction('
int gcd(int a, int b) {
if (b == 0) return abs(a);
return gcd(b, a % b);
}
')
cppFunction('
int lcm(int a, int b) {
return abs(a*b) / std::gcd(a,b);
}
')
# Defining constructor
setClass("Rational",
slots=c(numerator="numeric", denominator="numeric"))
# Defining validator
setValidity("Rational", function(object){
if(object@denominator==0){
return("Denominator is non-zero")
}
TRUE
})
## Class "Rational" [in ".GlobalEnv"]
##
## Slots:
##
## Name: numerator denominator
## Class: numeric numeric
##' @title Create a `rational` object
##' @param numerator and denominator
##' @return rational
##' @export
create_rational<-function(numerator, denominator) {
invisible(new("Rational", numerator=numerator, denominator=denominator))
}
# Defining a show method
setMethod("show", "Rational",
function(object) {
cat(paste(object@numerator,"/",object@denominator,"\n"))
}
)
# Defining a simplify method
simplify <- function(x) stop("This function needs specfic definition")
setMethod("simplify", "Rational", function(x) {
if (x@numerator==0) {
print(0)
} else {
gcd_num <- gcd(x@numerator, x@denominator)
x@numerator= x@numerator/gcd_num
x@denominator=x@denominator/gcd_num
print(x)
}
})
## Creating a generic function from function 'simplify' in the global environment
# Defining quotient method
quotient <- function(x,...) stop("This function needs specfic definition")
setMethod("quotient", "Rational", function(x, digits = NULL) {
val <- x@numerator / x@denominator
if (!is.null(digits) && is.numeric(digits)) {
format_val <- format(val,digits=digits)
print(format_val)
} else {
print(val)
}
})
## Creating a generic function from function 'quotient' in the global environment
# Defining Addition
setMethod("+", signature(e1="Rational",
e2="Rational"),
function(e1, e2) {
common_denom=lcm(e1@denominator, e2@denominator)
numerator2=(e1@numerator*(common_denom/e1@denominator))+
(e2@numerator*(common_denom/e2@denominator))
simplify(create_rational(numerator2, common_denom))
})
# Defining Subtraction
setMethod("-", signature(e1="Rational",
e2="Rational"),
function(e1, e2) {
common_denom=lcm(e1@denominator, e2@denominator)
numerator2=(e1@numerator*(common_denom/e1@denominator))-
(e2@numerator*(common_denom/e2@denominator))
simplify(create_rational(numerator2, common_denom))
})
# Defining Multiplication
setMethod("*", signature(e1="Rational",
e2="Rational"),
function(e1, e2) {
numerator2=e1@numerator*e2@numerator
denominator2=e1@denominator*e2@denominator
simplify(create_rational(numerator2,denominator2))
})
# Defining Division
setMethod("/", signature(e1="Rational",
e2="Rational"),
function(e1, e2) {
numerator2=e1@numerator*e2@denominator
denominator2=e1@denominator*e2@numerator
simplify(create_rational(numerator2,denominator2))
})
# Creating Rational objects
r1 <- create_rational(24,6)
r2 <- create_rational(7,230)
r3 <- create_rational(0,4)
# Evaluating the following code
r1
## 24 / 6
r3
## 0 / 4
r1 + r2
## 927 / 230
## 927 / 230
r1 - r2
## 913 / 230
## 913 / 230
r1 * r2
## 14 / 115
## 14 / 115
r1 / r2
## 920 / 7
## 920 / 7
r1 + r3
## 4 / 1
## 4 / 1
r1 * r3
## [1] 0
## [1] 0
r2 / r3
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'simplify': invalid class "Rational" object: Denominator is non-zero
quotient(r1)
## [1] 4
quotient(r2)
## [1] 0.03043478
quotient(r2, digits = 3)
## [1] "0.0304"
quotient(r2, digits = 3.14)
## [1] "0.0304"
quotient(r2, digits = "avocado")
## [1] 0.03043478
q2 <- quotient(r2, digits = 3)
## [1] "0.0304"
q2
## [1] "0.0304"
quotient(r3)
## [1] 0
simplify(r1)
## 4 / 1
simplify(r2)
## 7 / 230
simplify(r3)
## [1] 0
# Loading packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
# Loading the dataset
p2=read_csv("df_for_ml_improved_new_market.csv")
## Rows: 4347 Columns: 112
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): eventdate
## dbl (111): id, case_id, year, height, width, size_inchsqr, price_usd, meanpr...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Renaming genre related columns
p2=p2 %>% rename(Photography=Genre___Photography,Painting=Genre___Painting,
Sculpture=Genre___Sculpture,Print=Genre___Print,Others=Genre___Others)
# Making a long dataset that indicates genre for each painting
p2b=p2 %>% pivot_longer(cols=c(Photography,Painting,Sculpture,Print,Others),
names_to = "genre",values_to = "count") %>% filter(count %in% 1)
# Generating percentage within each year for each genre
p2b2=p2b %>% group_by(year, genre) %>% summarise(count = n()) %>% ungroup() %>%
group_by(year) %>% mutate(percent = count / sum(count) * 100)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
# Generating stacked bar plot using plotly
plot_ly(data=p2b2,x= ~year,y= ~percent,color= ~genre,type='bar') %>%
layout(title = "Distribution of Genre of Sales Across Years",xaxis=list(title="Year"),
yaxis=list(title="Percentage"),barmode="stack")
# Calculate average price per year for overall trend
overall_p=p2b %>% group_by(year) %>%
summarize(average_price = mean(price_usd, na.rm=TRUE)) %>% mutate(Genre ="Overall")
# Calculate average price per year and genre
genre_p=p2b %>% group_by(year,genre) %>%
summarize(average_price =mean(price_usd, na.rm=TRUE)) %>%
rename(Genre=genre) %>% ungroup()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
# Plotting overall trend using plotly
fig <- plot_ly() %>%
add_trace(data =overall_p, x = ~year, y = ~average_price, type ='scatter',
mode='lines+markers', visible =TRUE)
# Add traces for each genre, initially hidden
for (genre in unique(genre_p$Genre)) {
fig <- fig %>%
add_trace(data = subset(genre_p, Genre==genre), x = ~year, y = ~average_price, type ='scatter',
mode ='lines+markers',visible =FALSE)
}
# Creating dropdown buttons
buttons <- list(
list(method = "restyle", args =list("visible",c(TRUE,rep(FALSE,5))),label="Overall"))
# Adding the button to the plot
for (i in 1:5) {
genre <- unique(genre_p$Genre)[i]
visibility <- rep(FALSE, length(unique(genre_p$Genre)) + 1)
visibility[i+1]=TRUE
buttons <- append(buttons,list(list(method="restyle", args =list("visible", visibility),label=genre)))
}
# Adding layouts
fig %>% layout(title = "Average Sales Price Over Time",xaxis = list(title = "Year"),
yaxis = list(title = "Average Sales Price (USD)"),
updatemenus=list(list(type="dropdown",buttons=buttons,showactive=TRUE)),showlegend=FALSE)
# Loading packages
library(nycflights13)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
# Generate a table reporting the mean and median departure delay per airport
flights=data.table(flights)
p3a=merge(flights[, faa := origin],airports, by = "faa", all.x = TRUE)
p3a[,.(N =.N, mean_delay = mean(dep_delay, na.rm = TRUE), median_delay = median(dep_delay, na.rm = TRUE)),
by = name] |> _[N >=10, !"N"] |> _[order(mean_delay, decreasing = TRUE)]
# Generating a table reporting the mean and median arrival delay per airport
flights=data.table(flights)
p3a2=merge(flights[, faa := dest],airports,by = "faa",
all.x = TRUE)
p3a2[,.(name = ifelse(is.na(first(name)), first(faa), first(name)),
N = .N,mean_delay = mean(arr_delay, na.rm = TRUE),
median_delay = median(arr_delay, na.rm = TRUE)),by = faa][N >=10,!c("faa", "N")][order(-mean_delay)] |> print(x = _, nrows = 1e3)
## name mean_delay median_delay
## <char> <num> <num>
## 1: Columbia Metropolitan 41.76415094 28.0
## 2: Tulsa Intl 33.65986395 14.0
## 3: Will Rogers World 30.61904762 16.0
## 4: Jackson Hole Airport 28.09523810 15.0
## 5: Mc Ghee Tyson 24.06920415 2.0
## 6: Dane Co Rgnl Truax Fld 20.19604317 1.0
## 7: Richmond Intl 20.11125320 1.0
## 8: Akron Canton Regional Airport 19.69833729 3.0
## 9: Des Moines Intl 19.00573614 0.0
## 10: Gerald R Ford Intl 18.18956044 1.0
## 11: Birmingham Intl 16.87732342 -2.0
## 12: Theodore Francis Green State 16.23463687 1.0
## 13: Greenville-Spartanburg International 15.93544304 -0.5
## 14: Cincinnati Northern Kentucky Intl 15.36456376 -3.0
## 15: Savannah Hilton Head Intl 15.12950601 -1.0
## 16: Manchester Regional Airport 14.78755365 -3.0
## 17: Eppley Afld 14.69889841 -2.0
## 18: Yeager 14.67164179 -1.5
## 19: Kansas City Intl 14.51405836 0.0
## 20: Albany Intl 14.39712919 -4.0
## 21: General Mitchell Intl 14.16722038 0.0
## 22: Piedmont Triad 14.11260054 -2.0
## 23: Washington Dulles Intl 13.86420212 -3.0
## 24: Cherry Capital Airport 12.96842105 -10.0
## 25: James M Cox Dayton Intl 12.68048606 -3.0
## 26: Louisville International Airport 12.66938406 -2.0
## 27: Chicago Midway Intl 12.36422360 -1.0
## 28: Sacramento Intl 12.10992908 4.0
## 29: Jacksonville Intl 11.84483416 -2.0
## 30: Nashville Intl 11.81245891 -2.0
## 31: Portland Intl Jetport 11.66040210 -4.0
## 32: Greater Rochester Intl 11.56064461 -5.0
## 33: Hartsfield Jackson Atlanta Intl 11.30011285 -1.0
## 34: Lambert St Louis Intl 11.07846451 -3.0
## 35: Norfolk Intl 10.94909344 -4.0
## 36: Baltimore Washington Intl 10.72673385 -5.0
## 37: Memphis Intl 10.64531435 -2.5
## 38: Port Columbus Intl 10.60132291 -3.0
## 39: Charleston Afb Intl 10.59296847 -4.0
## 40: Philadelphia Intl 10.12719014 -3.0
## 41: Raleigh Durham Intl 10.05238095 -3.0
## 42: Indianapolis Intl 9.94043412 -3.0
## 43: Charlottesville-Albemarle 9.50000000 -5.0
## 44: Cleveland Hopkins Intl 9.18161129 -5.0
## 45: Ronald Reagan Washington Natl 9.06695204 -2.0
## 46: Burlington Intl 8.95099602 -4.0
## 47: Buffalo Niagara Intl 8.94595186 -5.0
## 48: Syracuse Hancock Intl 8.90392501 -5.0
## 49: Denver Intl 8.60650021 -2.0
## 50: Palm Beach Intl 8.56297210 -3.0
## 51: BQN 8.24549550 -1.0
## 52: Bob Hope 8.17567568 -3.0
## 53: Fort Lauderdale Hollywood Intl 8.08212154 -3.0
## 54: Bangor Intl 8.02793296 -9.0
## 55: Asheville Regional Airport 8.00383142 -1.0
## 56: PSE 7.87150838 0.0
## 57: Pittsburgh Intl 7.68099053 -5.0
## 58: Gallatin Field 7.60000000 -2.0
## 59: NW Arkansas Regional 7.46572581 -2.0
## 60: Tampa Intl 7.40852503 -4.0
## 61: Charlotte Douglas Intl 7.36031885 -3.0
## 62: Minneapolis St Paul Intl 7.27016886 -5.0
## 63: William P Hobby 7.17618819 -4.0
## 64: Bradley Intl 7.04854369 -10.0
## 65: San Antonio Intl 6.94537178 -9.0
## 66: South Bend Rgnl 6.50000000 -3.5
## 67: Louis Armstrong New Orleans Intl 6.49017497 -6.0
## 68: Key West Intl 6.35294118 7.0
## 69: Eagle Co Rgnl 6.30434783 -4.0
## 70: Austin Bergstrom Intl 6.01990875 -5.0
## 71: Chicago Ohare Intl 5.87661475 -8.0
## 72: Orlando Intl 5.45464309 -5.0
## 73: Detroit Metro Wayne Co 5.42996346 -7.0
## 74: Portland Intl 5.14157973 -5.0
## 75: Nantucket Mem 4.85227273 -3.0
## 76: Wilmington Intl 4.63551402 -7.0
## 77: Myrtle Beach Intl 4.60344828 -13.0
## 78: Albuquerque International Sunport 4.38188976 -5.5
## 79: George Bush Intercontinental 4.24079040 -5.0
## 80: Norman Y Mineta San Jose Intl 3.44817073 -7.0
## 81: Southwest Florida Intl 3.23814963 -5.0
## 82: San Diego Intl 3.13916574 -5.0
## 83: Sarasota Bradenton Intl 3.08243131 -5.0
## 84: Metropolitan Oakland Intl 3.07766990 -9.0
## 85: General Edward Lawrence Logan Intl 2.91439222 -9.0
## 86: San Francisco Intl 2.67289152 -8.0
## 87: SJU 2.52052659 -6.0
## 88: Yampa Valley 2.14285714 2.0
## 89: Phoenix Sky Harbor Intl 2.09704733 -6.0
## 90: Montrose Regional Airport 1.78571429 -10.5
## 91: Los Angeles Intl 0.54711094 -7.0
## 92: Dallas Fort Worth Intl 0.32212685 -9.0
## 93: Miami Intl 0.29905978 -9.0
## 94: Mc Carran Intl 0.25772849 -8.0
## 95: Salt Lake City Intl 0.17625459 -8.0
## 96: Long Beach -0.06202723 -10.0
## 97: Martha\\\\'s Vineyard -0.28571429 -11.0
## 98: Seattle Tacoma Intl -1.09909910 -11.0
## 99: Honolulu Intl -1.36519258 -7.0
## 100: STT -3.83590734 -9.0
## 101: John Wayne Arpt Orange Co -7.86822660 -11.0
## 102: Palm Springs Intl -12.72222222 -13.5
## name mean_delay median_delay
# Table of aircraft model with the fastest average speed
planes=data.table(planes)
p3b =merge(flights,planes,by = "tailnum",all.x = TRUE)
p3b2=p3b[, `:=`(n =.N,
average_mph = mean(distance/(air_time/60), na.rm = TRUE)),by = model]
p3b2[p3b2[,.I[which.max(average_mph)]],.(model, average_mph, n)]